# Ariga, Kenichi "Incumbency Disadvantage Under Electoral Rules with
# Intra-Party Competition: Evidence from Japan"
#
# Replication program 2
#
# This replication program will apply the method of Angrist and Rokkanen (2013),
# which was adapted to the election context by Hainmueller, Hall, and Snyder (2015).
#
# The following figures and tables will be produced:
#      Figures A2 through A4 of the online appendix; and
#      Tables A2 through A3 of the online appendix.

rm(list=ls())

# Set working directory.
# The outputs of this code will be saved in this directory.
setwd("")

# Load the data.
load("IncDisadvJPN.RData")

# Load the packages.
library(sandwich)
library(lmtest)
library(Matching)
library(rgenoud)

# Load the replication data and choose the observations included in the analysis.
# See Replication1.R for an explanation on the selection of the observations.
data <- LDP
data <- data[which(data$year!=1958 & data$year!=1993),]
data <- data[which(data$dm>=3 & data$dm<=5),]
data <- subset(data, after.redist!=1)
data <- subset(data, next.after.redist!=1)
data <- subset(data, next.ptyid=="LDP")
data <- subset(data, next.d.numinc>=1 & next.d.numcan.samepty > next.d.numinc)

# Test on the conditional independence ---> Figure A2
data$lag.d.vts2 <- data$lag.d.vts
data$lag.d.vts2[is.na(data$lag.d.vts)] <- 0

# Incumbents
window <- seq(0.01,0.15,0.01)

for (w in 1:length(window)) {

    est.data <- subset(data,win==1 & d.vts.mgn<=window[w])

    reg <- lm(next.run.win ~ d.vts.mgn
                            + lag.run.cand.samedist
                            + lag.win.samedist
                            + lag.d.vts2
                            + numrun
                            + senior
                            + age
                            + female
                            + quality.cand
                            + dm
                            + lag.d.ptyvts
               ,
       data=est.data)

    all.obs <- dim(subset(data, win==1))[1]
    included.obs <- dim(subset(data, win==1 & d.vts.mgn<=window[w]))[1]
    coverage <- included.obs/all.obs

    # SE clustered in candid
    # Adaptation of an R code of Mahmood Arai
    # http://people.su.se/~ma/mcluster.R
    N <- length(est.data$candid)
    K <- reg$rank
    M <- length(unique(est.data$candid))
    dfc <- (M/(M-1))*((N-1)/(N-K))
    uj <- apply(estfun(reg),2, function(x) tapply(x, est.data$candid, sum))
    vcov.cl <- dfc*sandwich(reg, meat=crossprod(uj)/N)
    se.cl <- sqrt(diag(vcov.cl))
    t.cl <- coefficients(reg)["d.vts.mgn"]/se.cl["d.vts.mgn"]
    pvalue.cl <- 2*(1-pnorm(abs(t.cl)))
    test.cl <- coeftest(reg,vcov.cl)

    results <- c(window[w],coefficients(reg)["d.vts.mgn"],summary(reg)$coefficients["d.vts.mgn",4],pvalue.cl["d.vts.mgn"],all.obs,included.obs,coverage)
    names(results) <- c("window","coefficients","pvalue","pvalue.cl","total obs","included obs","coverage")

    if (w==1) {
        INC <- results
    } else if (w>=2) {
        INC <- rbind(INC,results)
    }
}

INC <- data.frame(INC)
dev.new()
plot(INC$window*100,INC$pvalue.cl, type="n",
     yaxt="n",
     xlab="Winning Vote Margin Window (%)", ylab="p-value", cex.lab=1.5,
     ylim=c(0,1), xlim=c(0,15))
with(subset(INC,pvalue.cl>0.10),
     points(window*100,pvalue.cl,pch=19,cex=1.5,col="black"))
with(subset(INC,pvalue.cl<=0.10),
     points(window*100,pvalue.cl,pch=19,cex=1.5,col="gray75"))
abline(h=0.05, lty=2, col="black")
abline(h=0.10, lty=3, col="black")
axis(2, at=c(seq(0,0.1,0.05),seq(0.1,1,0.1)),
     labels=sprintf("%1.2f",c(seq(0,0.1,0.05),seq(0.1,1,0.1))), las=2, cex.axis=1)

dev.copy(png,"Fig_A2_a_CItest_Inc.png")
dev.off()

# Non-Incumbents
 window <- seq(-0.01,-0.15,-0.01)

for (w in 1:length(window)) {

    est.data <- subset(data, win==0 & d.vts.mgn>=window[w])

    reg <- lm(next.run.win ~ d.vts.mgn
                            + lag.run.cand.samedist
                            + lag.win.samedist
                            + lag.d.vts2
                            + numrun
                            + senior
                            + age
                            + female
                            + quality.cand
                            + dm
                            + lag.d.ptyvts
               ,
       data=est.data)

    all.obs <- dim(subset(data, win==0))[1]
    included.obs <- dim(subset(data, win==0 & d.vts.mgn>=window[w]))[1]
    coverage <- included.obs/all.obs

    N <- length(est.data$candid)
    K <- reg$rank
    M <- length(unique(est.data$candid))
    dfc <- (M/(M-1))*((N-1)/(N-K))
    uj <- apply(estfun(reg),2, function(x) tapply(x, est.data$candid, sum))
    vcov.cl <- dfc*sandwich(reg, meat=crossprod(uj)/N)
    se.cl <- sqrt(diag(vcov.cl))
    t.cl <- coefficients(reg)["d.vts.mgn"]/se.cl["d.vts.mgn"]
    pvalue.cl <- 2*(1-pnorm(abs(t.cl)))
    test.cl <- coeftest(reg,vcov.cl)

    results <- c(window[w],coefficients(reg)["d.vts.mgn"],summary(reg)$coefficients["d.vts.mgn",4],pvalue.cl["d.vts.mgn"],all.obs,included.obs,coverage)
    names(results) <- c("window","coefficients","pvalue","pvalue.cl","total obs","included obs","coverage")

    if (w==1) {
        NON.INC <- results
    } else if (w>=2) {
        NON.INC <- rbind(NON.INC,results)
    }
}

dev.new()
NON.INC <- data.frame(NON.INC)
plot(NON.INC$window*100,NON.INC$pvalue.cl, type="n",
     yaxt="n",
     xlab="Losing Vote Margin Window (%)", ylab="p-value", cex.lab=1.5,
     ylim=c(0,1), xlim=c(0,-15))

with(subset(NON.INC,pvalue.cl>0.10),
     points(window*100,pvalue.cl,pch=19,cex=1.5,col="black"))
with(subset(NON.INC,pvalue.cl<=0.10),
     points(window*100,pvalue.cl,pch=19,cex=1.5,col="gray75"))
abline(h=0.05, lty=2, col="black")
abline(h=0.10, lty=3, col="black")
axis(2, at=c(seq(0,0.1,0.05),seq(0.1,1,0.1)),
     labels=sprintf("%1.2f",c(seq(0,0.1,0.05),seq(0.1,1,0.1))), las=2, cex.axis=1)

dev.copy(png,"Fig_A2_b_CItest_NonInc.png")
dev.off()


# Estimate ATT of incumbency using the genetic matching.
#
# To produce the same result in the online appendix of the article, the following seed values
# for the random number generator should be used. If these values are set at different values,
# the result will not be exactly the same as the one in the online appendix of the article,
# as the genetic optimization used in the genetic matching is a stochastic algorithm.
seed1 <- 7777
seed2 <- 3333

match.data <- subset(data, d.vts.mgn<=0.05 & d.vts.mgn>=-0.01)
# Total observations in the dataset ("data").
# N = 1773 (Inc) + 839 (Non-inc) = 2612
# Observations in the dataset used for matching ("match.data").
# Incumbents
#   =< 5%: (1025 / 1773) * 100 = 57.81% of the incumbents included in the analysis.
# Non Incumbents
#   >= -1%: (208 / 839) * 100 = 24.79% of the non-incumbents included in the analysis.

# Outcome and treatment variables
Y <- match.data$next.run.win
Y2 <- match.data$next.run.cand
Tr <- match.data$win

# DM is changed to a dichotomous variable for each value of DM.
match.data$dm3 <- 0
match.data$dm3[match.data$dm==3] <- 1
match.data$dm4 <- 0
match.data$dm4[match.data$dm==4] <- 1
match.data$dm5 <- 0
match.data$dm5[match.data$dm==5] <- 1

# Covariates to be balanced.
attach(match.data)
BlMx <-cbind(lag.run.cand.samedist
                , lag.win.samedist
                , lag.d.vts2
                , numrun
                , senior
                , age
                , female
                , quality.cand
                , dm3
                , dm4
                , dm5
                , lag.d.ptyvts
             )
detach(match.data)

# Genetic matching
gen.match <- GenMatch(Tr=Tr, X=BlMx, BalanceMatrix=BlMx,
                      estimand="ATT",
                      pop.size=1000,
                      hard.generation.limit=TRUE,
                      max.generations=100,
                      wait.generations=4,
                      M=3,
                      loss=2,
                      int.seed=seed1,
                      unif.seed=seed2)

# ATT in Pr(R&W)
mgen.match <- Match(Y=Y, Tr=Tr, X=BlMx, Weight.matrix = gen.match)
summary(mgen.match)

# ATT in Pr(W)
mgen.match2 <- Match(Y=Y2, Tr=Tr, X=BlMx, Weight.matrix = gen.match)
summary(mgen.match2)

# Record ATTs in Pr(R&W) and Pr(W) ---> Table A3
MatchEst <- matrix(NA,2,4)
colnames(MatchEst) <- c("ATT","se","lb90","ub90")
rownames(MatchEst) <- c("Pr(R&W)","Pr(R)")
MatchEst[1,"ATT"] <- mgen.match$est
MatchEst[1,"se"] <- mgen.match$se
MatchEst[1,"lb90"] <- MatchEst[1,"ATT"] + qnorm(0.025)*MatchEst[1,"se"]
MatchEst[1,"ub90"] <- MatchEst[1,"ATT"] + qnorm(0.975)*MatchEst[1,"se"]
MatchEst[2,"ATT"] <- mgen.match2$est
MatchEst[2,"se"] <- mgen.match2$se
MatchEst[2,"lb90"] <- MatchEst[2,"ATT"] + qnorm(0.025)*MatchEst[2,"se"]
MatchEst[2,"ub90"] <- MatchEst[2,"ATT"] + qnorm(0.975)*MatchEst[2,"se"]

write.csv(MatchEst,file="Table_A3_GenMatchATT.csv")

# Draw a figure of ATTs in Pr(R&W) and Pr(W) ---> Figure A4
MatchEst <- data.frame(MatchEst)
attach(MatchEst)
dev.new(width=10,height=2)
par(mar=c(2,7,0.25,0.25))
plot(ATT*100, index, type="n",
     yaxt="n", xaxt="n", xlab="", ylab="",
     ylim=c(1,4), xlim=c(-40,40)
     )
abline(v=0,lt=2,lwd=1.5,col="gray75")
points(ATT*100, index, pch=19, col="black")
segments(x0=lb90*100, x1=ub90*100, y0=index, y1=index,
         lty=1,lwd=2.5,col="black")
axis(1, at=seq(-40,40,10),
     labels=sprintf("%1.0f",seq(-40,40,10)),
     las=1, cex.axis=1.5)
axis(2, at=c(2,3),
     labels=c("Pr(R)","Pr(R&W)"),
     las=1, cex.axis=1.5,tick=FALSE)
dev.copy(png,paste("Fig_A4_GenMatchATT.png",sep=""),
         width=10,height=2,units="in",res=120)
dev.off()
detach(MatchEst)

# Covariate balance check
gen.match.balance <- MatchBalance(Tr ~ lag.run.cand.samedist
                + lag.win.samedist
                + lag.d.vts2
                + numrun
                + senior
                + age
                + female
                + quality.cand
                + dm3
                + dm4
                + dm5
                + lag.d.ptyvts
               ,
       data=match.data, match.out=mgen.match, nboots=1000)


# Collect the results of the balance check ---> Table A2
ncov <- 12

name.cov <- c(  "Ran in t-1",         # lag.run.cand.samedist
                "Won a seat in t-1",  # lag.win.samedist
                "Vote Share in t-1",  # lag.d.vts2
                "No. of past attempts, before t",    # numrun
                "No. of past victories, before t",    # senior
                "Age",       # age
                "Female",    # female
                "Candidate quality",       # quality.cand
                "DM = 3",      # dm3
                "DM = 4",      # dm4
                "DM = 5",      # dm5
                "LDP's vote share in t-1"  # lag.d.ptyvts
                                )

balance <- matrix(NA,ncov,15)
rownames(balance) <- name.cov
for (i in 1:ncov){
        balance[i,1] <- gen.match.balance$BeforeMatching[[i]]$mean.Tr
        balance[i,2] <- gen.match.balance$BeforeMatching[[i]]$mean.Co
        balance[i,3] <- gen.match.balance$AfterMatching[[i]]$mean.Co

        balance[i,4] <- gen.match.balance$BeforeMatching[[i]]$mean.Tr - gen.match.balance$BeforeMatching[[i]]$mean.Co
        balance[i,5] <- gen.match.balance$AfterMatching[[i]]$mean.Tr  - gen.match.balance$AfterMatching[[i]]$mean.Co
        balance[i,6] <-100*(abs(balance[i,4])-abs(balance[i,5]))/abs(balance[i,4])

        balance[i,7] <- gen.match.balance$BeforeMatching[[i]]$qqsummary.raw$meandiff
        balance[i,8] <- gen.match.balance$AfterMatching[[i]]$qqsummary.raw$meandiff
        balance[i,9] <-100*(abs(balance[i,7])-abs(balance[i,8]))/abs(balance[i,7])

        balance[i,10] <- gen.match.balance$BeforeMatching[[i]]$qqsummary.raw$maxdiff
        balance[i,11] <- gen.match.balance$AfterMatching[[i]]$qqsummary.raw$maxdiff
        balance[i,12] <-100*(abs(balance[i,10])-abs(balance[i,11]))/abs(balance[i,10])

        balance[i,13] <- gen.match.balance$BeforeMatching[[i]]$qqsummary.raw$mediandiff
        balance[i,14] <- gen.match.balance$AfterMatching[[i]]$qqsummary.raw$mediandiff
        balance[i,15] <-100*(abs(balance[i,13])-abs(balance[i,14]))/abs(balance[i,13])
}

colnames(balance) <- c("tr.mean","bf.co.mean","af.co.mean",
                       "bf.mdiff","af.mdiff","mdiff.imp",
                       "bf.eQQ.mean.diff","af.eQQ.mean.diff","eQQ.mean.diff.imp",
                       "bf.eQQ.max.diff","af.eQQ.max.diff","eQQ.max.diff.imp",
                       "bf.eQQ.med.diff","af.eQQ.med.diff","eQQ.med.diff.imp"
                       )

write.csv(balance,file="Table_A2_CovariateBalance.csv")

# Empirical QQ plots --> Figure A3

plot.pch <- 16
plot.cex <- 1.5
plot.col <- "gray"

# lag.d.vts2
dev.new(width=10,height=5)
par(mfrow=c(1,2))

qqplot(match.data$lag.d.vts2[match.data$win==0],
       match.data$lag.d.vts2[match.data$win==1],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,0.4), xlim=c(0,0.4),
       ylab="Vote share in t-1 (Incumbents)",
       xlab="Vote share in t-1 (Non-Incumbents)",
       cex.lab=1.25,
       main="Before Matching")
abline(coef=c(0,1))

qqplot(match.data$lag.d.vts2[mgen.match$index.control],
       match.data$lag.d.vts2[mgen.match$index.treated],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,0.4), xlim=c(0,0.4),
       ylab="Vote share in t-1 (Incumbents)",
       xlab="Vote share in t-1 (Non-Incumbents)",
       cex.lab=1.25,
       main="After Genetic Matching")
abline(coef=c(0,1))

dev.copy(png,"Fig_A3_eQQ1_lag.d.vts2.png",
     width=10,height=5,units="in",res=120)
dev.off()


# numrun
dev.new(width=10,height=5)
par(mfrow=c(1,2))

qqplot(match.data$numrun[match.data$win==0],
       match.data$numrun[match.data$win==1],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,20), xlim=c(0,20),
       ylab="No. of past attempts, before t (Incumbents)",
       xlab="No. of past attempts, before t (Non-Incumbents)",
       cex.lab=1.25,
       main="Before Matching")
abline(coef=c(0,1))

qqplot(match.data$numrun[mgen.match$index.control],
       match.data$numrun[mgen.match$index.treated],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,20), xlim=c(0,20),
       ylab="No. of past attempts, before t (Incumbents)",
       xlab="No. of past attempts, before t (Non-Incumbents)",
       cex.lab=1.25,
       main="After Genetic Matching")
abline(coef=c(0,1))

dev.copy(png,"Fig_A3_eQQ2_numrun.png",
     width=10,height=5,units="in",res=120)
dev.off()


# senior
dev.new(width=10,height=5)
par(mfrow=c(1,2))

qqplot(match.data$senior[match.data$win==0],
       match.data$senior[match.data$win==1],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,20), xlim=c(0,20),
       ylab="No. of past victories, before t (Incumbents)",
       xlab="No. of past victories, before t (Non-Incumbents)",
       cex.lab=1.25,
       main="Before Matching")
abline(coef=c(0,1))

qqplot(match.data$senior[mgen.match$index.control],
       match.data$senior[mgen.match$index.treated],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,20), xlim=c(0,20),
       ylab="No. of past victories, before t (Incumbents)",
       xlab="No. of past victories, before t (Non-Incumbents)",
       cex.lab=1.25,
       main="After Genetic Matching")
abline(coef=c(0,1))

dev.copy(png,"Fig_A3_eQQ3_senior.png",
     width=10,height=5,units="in",res=120)
dev.off()


# age
dev.new(width=10,height=5)
par(mfrow=c(1,2))

qqplot(match.data$age[match.data$win==0],
       match.data$age[match.data$win==1],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(20,90), xlim=c(20,90),
       ylab="Age (Incumbents)",
       xlab="Age (Non-Incumbents)",
       cex.lab=1.25,
       main="Before Matching")
abline(coef=c(0,1))

qqplot(match.data$age[mgen.match$index.control],
       match.data$age[mgen.match$index.treated],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(20,90), xlim=c(20,90),
       ylab="Age (Incumbents)",
       xlab="Age (Non-Incumbents)",
       cex.lab=1.25,
       main="After Genetic Matching")
abline(coef=c(0,1))

dev.copy(png,"Fig_A3_eQQ4_age.png",
     width=10,height=5,units="in",res=120)
dev.off()


# lag.d.ptyvts
dev.new(width=10,height=5)
par(mfrow=c(1,2))

qqplot(match.data$lag.d.ptyvts[match.data$win==0],
       match.data$lag.d.ptyvts[match.data$win==1],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,1), xlim=c(0,1),
       ylab="LDP's vote share in t-1 (Incumbents)",
       xlab="LDP's vote share in t-1 (Non-Incumbents)",
       cex.lab=1.25,
       main="Before Matching")
abline(coef=c(0,1))

qqplot(match.data$lag.d.ptyvts[mgen.match$index.control],
       match.data$lag.d.ptyvts[mgen.match$index.treated],
       pch=plot.pch, col=plot.col, cex=plot.cex,
       ylim=c(0,1), xlim=c(0,1),
       ylab="LDP's vote share in t-1 (Incumbents)",
       xlab="LDP's vote share in t-1 (Non-Incumbents)",
       cex.lab=1.25,
       main="After Genetic Matching")
abline(coef=c(0,1))

dev.copy(png,"Fig_A3_eQQ5_lag.d.ptyvts.png",
     width=10,height=5,units="in",res=120)
dev.off()

graphics.off()
